#!/usr/bin/env python
# -*- coding: utf-8 -*-

from src.manager.tree_manager import TreeManager


def create_data_set():
    data_set = [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']]
    labels = ['no surfacing', 'flippers']
    return data_set, labels


if __name__ == "__main__":
    # 存储决策树，从文件中提取决策树
    # my_data, labels = create_data_set()
    # tree_manager = TreeManager()
    # my_tree = tree_manager.create_tree(my_data, labels)
    # tree_manager.write_tree(my_tree, 'classifierStorage.txt')
    # result = tree_manager.read_tree('classifierStorage.txt')
    # print(result)

    # 创建决策树，使用决策树进行分类
    my_data, labels = create_data_set()
    tree_manager = TreeManager()
    my_tree = tree_manager.create_tree(my_data, labels)
    print(my_tree)
    my_data, labels = create_data_set()  # 需要再给测试集和标签赋值，否则labels中的数据将不准
    result = tree_manager.classify(my_tree, labels, [1, 1])
    print(result)

    # 选择最好的特征值，以便划分数据集
    # my_data, labels = create_data_set()
    # print(my_data, labels)
    # tree_manager = TreeManager()
    # result = tree_manager.choose_best_feature_to_split(my_data)
    # print(result)

    # 划分数据集
    # my_data, labels = create_data_set()
    # tree_manager = TreeManager()
    # result = tree_manager.split_data_set(my_data, 0, 1)
    # print(result)
    # result = tree_manager.split_data_set(my_data, 0, 0)
    # print(result)

    # 计算香农熵
    # my_data, labels = create_data_set()
    # tree_manager = TreeManager()
    # result = tree_manager.calculate_shannon_entropy(my_data)
    # print(result)
